<!DOCTYPE HTML>
<html lang="zh-CN">


<head>
    <meta charset="utf-8">
    <meta name="keywords" content="Keras文本处理, SongX64">
    <meta name="description" content="">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="renderer" content="webkit|ie-stand|ie-comp">
    <meta name="mobile-web-app-capable" content="yes">
    <meta name="format-detection" content="telephone=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="baidu-site-verification" content="code-8GV6uhDsco" />
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
    <!-- Global site tag (gtag.js) - Google Analytics -->


    <title>Keras文本处理 | SongX64</title>
    <link rel="icon" type="image/png" href="/favicon.png">

    <link rel="stylesheet" type="text/css" href="/libs/awesome/css/all.css">
    <link rel="stylesheet" type="text/css" href="/libs/materialize/materialize.min.css">
    <link rel="stylesheet" type="text/css" href="/libs/aos/aos.css">
    <link rel="stylesheet" type="text/css" href="/libs/animate/animate.min.css">
    <link rel="stylesheet" type="text/css" href="/libs/lightGallery/css/lightgallery.min.css">
    <link rel="stylesheet" type="text/css" href="/css/matery.css">
    <link rel="stylesheet" type="text/css" href="/css/my.css">

    <script src="/libs/jquery/jquery.min.js"></script>

<meta name="generator" content="Hexo 5.4.0"><link rel="alternate" href="/atom.xml" title="SongX64" type="application/atom+xml">
</head>



   <style>
    body{
       background-image: url(https://gitee.com/songx86/SongPicBed/raw/master/img/fd039245d688d43f5595595b7d1ed21b0ef43b5f.jpg);
       background-repeat:no-repeat;
       background-size:cover;
       background-attachment:fixed;
    }
</style>



<body>
    <header class="navbar-fixed">
    <nav id="headNav" class="bg-color nav-transparent">
        <div id="navContainer" class="nav-wrapper container">
            <div class="brand-logo">
                <a href="/" class="waves-effect waves-light">
                    
                    <img src="/medias/logo.png" class="logo-img" alt="LOGO">
                    
                    <span class="logo-span">SongX64</span>
                </a>
            </div>
            

<a href="#" data-target="mobile-nav" class="sidenav-trigger button-collapse"><i class="fas fa-bars"></i></a>
<ul class="right nav-menu">
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/" class="waves-effect waves-light">
      
      <i class="fas fa-home" style="zoom: 0.6;"></i>
      
      <span>首页</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/tags" class="waves-effect waves-light">
      
      <i class="fas fa-tags" style="zoom: 0.6;"></i>
      
      <span>标签</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/categories" class="waves-effect waves-light">
      
      <i class="fas fa-bookmark" style="zoom: 0.6;"></i>
      
      <span>分类</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/archives" class="waves-effect waves-light">
      
      <i class="fas fa-archive" style="zoom: 0.6;"></i>
      
      <span>归档</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/about" class="waves-effect waves-light">
      
      <i class="fas fa-user-circle" style="zoom: 0.6;"></i>
      
      <span>关于</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/contact" class="waves-effect waves-light">
      
      <i class="fas fa-comments" style="zoom: 0.6;"></i>
      
      <span>留言板</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/friends" class="waves-effect waves-light">
      
      <i class="fas fa-address-book" style="zoom: 0.6;"></i>
      
      <span>友情链接</span>
    </a>
    
  </li>
  
  <li>
    <a href="#searchModal" class="modal-trigger waves-effect waves-light">
      <i id="searchIcon" class="fas fa-search" title="搜索" style="zoom: 0.85;"></i>
    </a>
  </li>
</ul>


<div id="mobile-nav" class="side-nav sidenav">

    <div class="mobile-head bg-color">
        
        <img src="/medias/logo.png" class="logo-img circle responsive-img">
        
        <div class="logo-name">SongX64</div>
        <div class="logo-desc">
            
            Never really desperate, only the lost of the soul.
            
        </div>
    </div>

    

    <ul class="menu-list mobile-menu-list">
        
        <li class="m-nav-item">
	  
		<a href="/" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-home"></i>
			
			首页
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/tags" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-tags"></i>
			
			标签
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/categories" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-bookmark"></i>
			
			分类
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/archives" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-archive"></i>
			
			归档
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/about" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-user-circle"></i>
			
			关于
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/contact" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-comments"></i>
			
			留言板
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/friends" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-address-book"></i>
			
			友情链接
		</a>
          
        </li>
        
        
        <li><div class="divider"></div></li>
        <li>
            <a href="https://github.com/blinkfox/hexo-theme-matery" class="waves-effect waves-light" target="_blank">
                <i class="fab fa-github-square fa-fw"></i>Fork Me
            </a>
        </li>
        
    </ul>
</div>


        </div>

        
            <style>
    .nav-transparent .github-corner {
        display: none !important;
    }

    .github-corner {
        position: absolute;
        z-index: 10;
        top: 0;
        right: 0;
        border: 0;
        transform: scale(1.1);
    }

    .github-corner svg {
        color: #0f9d58;
        fill: #fff;
        height: 64px;
        width: 64px;
    }

    .github-corner:hover .octo-arm {
        animation: a 0.56s ease-in-out;
    }

    .github-corner .octo-arm {
        animation: none;
    }

    @keyframes a {
        0%,
        to {
            transform: rotate(0);
        }
        20%,
        60% {
            transform: rotate(-25deg);
        }
        40%,
        80% {
            transform: rotate(10deg);
        }
    }
</style>

<a href="https://github.com/blinkfox/hexo-theme-matery" class="github-corner tooltipped hide-on-med-and-down" target="_blank"
   data-tooltip="Fork Me" data-position="left" data-delay="50">
    <svg viewBox="0 0 250 250" aria-hidden="true">
        <path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path>
        <path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2"
              fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path>
        <path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z"
              fill="currentColor" class="octo-body"></path>
    </svg>
</a>
        
    </nav>

</header>

    



<div class="bg-cover pd-header post-cover" style="background-image: url('/medias/featureimages/106.jpg')">
    <div class="container" style="right: 0px;left: 0px;">
        <div class="row">
            <div class="col s12 m12 l12">
                <div class="brand">
                    <h1 class="description center-align post-title">Keras文本处理</h1>
                </div>
            </div>
        </div>
    </div>
</div>




<main class="post-container content">

    
    <link rel="stylesheet" href="/libs/tocbot/tocbot.css">
<style>
    #articleContent h1::before,
    #articleContent h2::before,
    #articleContent h3::before,
    #articleContent h4::before,
    #articleContent h5::before,
    #articleContent h6::before {
        display: block;
        content: " ";
        height: 100px;
        margin-top: -100px;
        visibility: hidden;
    }

    #articleContent :focus {
        outline: none;
    }

    .toc-fixed {
        position: fixed;
        top: 64px;
    }

    .toc-widget {
        width: 345px;
        padding-left: 20px;
    }

    .toc-widget .toc-title {
        padding: 35px 0 15px 17px;
        font-size: 1.5rem;
        font-weight: bold;
        line-height: 1.5rem;
    }

    .toc-widget ol {
        padding: 0;
        list-style: none;
    }

    #toc-content {
        padding-bottom: 30px;
        overflow: auto;
    }

    #toc-content ol {
        padding-left: 10px;
    }

    #toc-content ol li {
        padding-left: 10px;
    }

    #toc-content .toc-link:hover {
        color: #42b983;
        font-weight: 700;
        text-decoration: underline;
    }

    #toc-content .toc-link::before {
        background-color: transparent;
        max-height: 25px;

        position: absolute;
        right: 23.5vw;
        display: block;
    }

    #toc-content .is-active-link {
        color: #42b983;
    }

    #floating-toc-btn {
        position: fixed;
        right: 15px;
        bottom: 76px;
        padding-top: 15px;
        margin-bottom: 0;
        z-index: 998;
    }

    #floating-toc-btn .btn-floating {
        width: 48px;
        height: 48px;
    }

    #floating-toc-btn .btn-floating i {
        line-height: 48px;
        font-size: 1.4rem;
    }
</style>
<div class="row">
    <div id="main-content" class="col s12 m12 l9">
        <!-- 文章内容详情 -->
<div id="artDetail">
    <div class="card">
        <div class="card-content article-info">
            <div class="row tag-cate">
                <div class="col s7">
                    
                    <div class="article-tag">
                        
                            <a href="/tags/%E7%A7%91%E7%A0%94/">
                                <span class="chip bg-color">科研</span>
                            </a>
                        
                            <a href="/tags/%E5%AE%9E%E9%AA%8C/">
                                <span class="chip bg-color">实验</span>
                            </a>
                        
                            <a href="/tags/Python/">
                                <span class="chip bg-color">Python</span>
                            </a>
                        
                            <a href="/tags/Keras/">
                                <span class="chip bg-color">Keras</span>
                            </a>
                        
                            <a href="/tags/%E6%96%87%E6%9C%AC%E5%A4%84%E7%90%86/">
                                <span class="chip bg-color">文本处理</span>
                            </a>
                        
                    </div>
                    
                </div>
                <div class="col s5 right-align">
                    
                    <div class="post-cate">
                        <i class="fas fa-bookmark fa-fw icon-category"></i>
                        
                            <a href="/categories/%E7%A7%91%E7%A0%94/" class="post-category">
                                科研
                            </a>
                        
                    </div>
                    
                </div>
            </div>

            <div class="post-info">
                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-minus fa-fw"></i>发布日期:&nbsp;&nbsp;
                    2020-07-22
                </div>
                

                

                
                <div class="info-break-policy">
                    <i class="far fa-file-word fa-fw"></i>文章字数:&nbsp;&nbsp;
                    2k
                </div>
                

                
                <div class="info-break-policy">
                    <i class="far fa-clock fa-fw"></i>阅读时长:&nbsp;&nbsp;
                    8 分
                </div>
                

                
            </div>
        </div>
        <hr class="clearfix">

        
        <!-- 是否加载使用自带的 prismjs. -->
        <link rel="stylesheet" href="/libs/prism/prism.css">
        

        

        <div class="card-content article-card-content">
            <div id="articleContent">
                <h1 id="Keras文本处理"><a href="#Keras文本处理" class="headerlink" title="Keras文本处理"></a>Keras文本处理</h1><h2 id="任务"><a href="#任务" class="headerlink" title="任务"></a>任务</h2><p><u>1.只用带标签的数据和标签 不要关键字 用LSTM分一次类 tokennizer</u> </p>
<p><u>2.Word2vec</u></p>
<p>3.用标签 + 关键字  用1、2准确度高的方法做  cnn lstm双输入，智能合约放lstm，关键字放CNN</p>
<p>4.把dataset没有标签的分配标签和关键字，重复上面三个</p>
<p>5.选出文件最大的前几个，执行4</p>
<p>结果：  召回率   准确率   F1       迭代次数 2或5代</p>
<h2 id="Word2Vec"><a href="#Word2Vec" class="headerlink" title="Word2Vec"></a>Word2Vec</h2><h2 id="Tokenizer"><a href="#Tokenizer" class="headerlink" title="Tokenizer"></a>Tokenizer</h2><p>[Keras中文文档——关于文本预处理](<a target="_blank" rel="noopener" href="https://keras-cn.readthedocs.io/en/latest/preprocessing/text/#_1">https://keras-cn.readthedocs.io/en/latest/preprocessing/text/#_1</a> ↩︎)</p>
<p>Tokenizer是一个用于向量化文本，或<strong>将文本转换为序列</strong>（即<strong>单个字词以及对应下标构成的列表，从1算起</strong>）的类。是用来文本预处理的第一步：<strong>分词</strong>。结合简单形象的例子会更加好理解些。</p>
<h3 id="1-语法"><a href="#1-语法" class="headerlink" title="1.语法"></a>1.语法</h3><p>官方语法如下：</p>
<pre class="line-numbers language-python" data-language="python"><code class="language-python">keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>text<span class="token punctuation">.</span>Tokenizer<span class="token punctuation">(</span>num_words<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
                                   filters<span class="token operator">=</span><span class="token string">'!"#$%&amp;()*+,-./:;&lt;=>?@[\]^_`&#123;|&#125;~\t\n'</span><span class="token punctuation">,</span>
                                   lower<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
                                   split<span class="token operator">=</span><span class="token string">" "</span><span class="token punctuation">,</span>
                                   char_level<span class="token operator">=</span><span class="token boolean">False</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span></span></code></pre>

<h4 id="1-1-构造参数"><a href="#1-1-构造参数" class="headerlink" title="1.1 构造参数"></a>1.1 构造参数</h4><ul>
<li>num_words： <strong>默认是None</strong>，处理所有字词。<pre><code>        但是如果设置成**一个整数**，那么最后返回的是最常见的、出现**频率最高的num_words个字词**。
</code></pre>
</li>
<li>filters：<strong>过滤一些特殊字符</strong>，默认上文的写法就可以了。</li>
<li>lower：全部转为<strong>小写</strong></li>
<li>split：字符串，单词的<strong>分隔符</strong>，如空格</li>
<li><u><em>char_level: 如果为 True, 每个字符将被视为一个标记</em></u></li>
</ul>
<h4 id="1-2-返回值"><a href="#1-2-返回值" class="headerlink" title="1.2 返回值"></a>1.2 返回值</h4><p>字符串列表</p>
<h4 id="1-3-类方法"><a href="#1-3-类方法" class="headerlink" title="1.3 类方法"></a>1.3 类方法</h4><p>下面是相关的类方法，部分示例在下一节中均有描述应用。</p>
<table>
<thead>
<tr>
<th>方法</th>
<th align="left">参数</th>
<th>返回值</th>
<th>备注</th>
</tr>
</thead>
<tbody><tr>
<td>fit_on_texts(texts)</td>
<td align="left">texts：要用以训练的文本列表</td>
<td></td>
<td></td>
</tr>
<tr>
<td>texts_to_sequences(texts)</td>
<td align="left">texts：待转为序列的文本列表</td>
<td>序列的列表，列表中每个序列对应于一段输入文本</td>
<td></td>
</tr>
<tr>
<td>texts_to_sequences_generator(texts)</td>
<td align="left">texts：待转为序列的文本列表</td>
<td>每次调用返回对应于一段输入文本的序列</td>
<td>本函数是<code>texts_to_sequences</code>的生成器函数版</td>
</tr>
<tr>
<td>texts_to_matrix(texts, mode)</td>
<td align="left">texts：待向量化的文本列表 mode：‘binary’，‘count’，‘tfidf’，‘freq’之一，默认为‘binary’</td>
<td>形如<code>(len(texts), nb_words)</code>的numpy array</td>
<td></td>
</tr>
<tr>
<td>fit_on_sequences(sequences)</td>
<td align="left">sequences：要用以训练的序列列表</td>
<td></td>
<td></td>
</tr>
<tr>
<td>sequences_to_matrix(sequences)</td>
<td align="left">sequences：待向量化的序列列表 mode：‘binary’，‘count’，‘tfidf’，‘freq’之一，默认为‘binary’</td>
<td>形如<code>(len(sequences), nb_words)</code>的numpy array</td>
<td></td>
</tr>
</tbody></table>
<h4 id="1-4-属性"><a href="#1-4-属性" class="headerlink" title="1.4 属性"></a>1.4 属性</h4><ul>
<li>word_counts:字典，将单词（字符串）映射为它们在<strong>训练期间出现的次数</strong>。仅在调用fit_on_texts之后设置。</li>
<li>word_docs: 字典，将单词（字符串）映射为它们在<strong>训练期间所出现的文档或文本的数量</strong>。仅在调用fit_on_texts之后设置。</li>
<li>word_index: 字典，将单词（字符串）映射为它们的<strong>排名或者索引</strong>。仅在调用fit_on_texts之后设置。</li>
<li>document_count: 整数。分词器被训练的文档（文本或者序列）数量。仅在调用fit_on_texts或fit_on_sequences之后设置。</li>
</ul>
<h3 id="2-简单示例"><a href="#2-简单示例" class="headerlink" title="2.简单示例"></a>2.简单示例</h3>  <pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token operator">>></span><span class="token operator">></span><span class="token keyword">from</span> keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>text <span class="token keyword">import</span> Tokenizer
Using TensorFlow backend<span class="token punctuation">.</span>

<span class="token comment">#  创建分词器 Tokenizer 对象</span>
<span class="token operator">>></span><span class="token operator">></span>tokenizer <span class="token operator">=</span> Tokenizer<span class="token punctuation">(</span><span class="token punctuation">)</span>

<span class="token comment">#  text</span>
<span class="token operator">>></span><span class="token operator">></span>text <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">"今天 北京 下 雨 了"</span><span class="token punctuation">,</span> <span class="token string">"我 今天 加班"</span><span class="token punctuation">]</span>

<span class="token comment">#  fit_on_texts 方法, 将要训练的文本传入</span>
<span class="token operator">>></span><span class="token operator">></span>tokenizer<span class="token punctuation">.</span>fit_on_texts<span class="token punctuation">(</span>text<span class="token punctuation">)</span>

<span class="token comment">#  word_counts属性，将单词（字符串）映射为它们在训练期间出现的次数。仅在调用fit_on_texts之后设置。</span>
<span class="token operator">>></span><span class="token operator">></span>tokenizer<span class="token punctuation">.</span>word_counts
OrderedDict<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">(</span><span class="token string">'今天'</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
             <span class="token punctuation">(</span><span class="token string">'北京'</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
             <span class="token punctuation">(</span><span class="token string">'下'</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
             <span class="token punctuation">(</span><span class="token string">'雨'</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
             <span class="token punctuation">(</span><span class="token string">'了'</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
             <span class="token punctuation">(</span><span class="token string">'我'</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">,</span>
             <span class="token punctuation">(</span><span class="token string">'加班'</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">)</span><span class="token punctuation">]</span><span class="token punctuation">)</span>

<span class="token comment">#  word_docs属性，将单词（字符串）映射为它们在训练期间所出现的文档或文本的数量。仅在调用fit_on_texts之后设置。</span>
<span class="token operator">>></span><span class="token operator">></span>tokenizer<span class="token punctuation">.</span>word_docs
defaultdict<span class="token punctuation">(</span><span class="token builtin">int</span><span class="token punctuation">,</span> <span class="token punctuation">&#123;</span><span class="token string">'下'</span><span class="token punctuation">:</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token string">'北京'</span><span class="token punctuation">:</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token string">'今天'</span><span class="token punctuation">:</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token string">'雨'</span><span class="token punctuation">:</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token string">'了'</span><span class="token punctuation">:</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token string">'我'</span><span class="token punctuation">:</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token string">'加班'</span><span class="token punctuation">:</span> <span class="token number">1</span><span class="token punctuation">&#125;</span><span class="token punctuation">)</span>

<span class="token comment">#  word_index属性，将单词（字符串）映射为它们的排名或者索引。仅在调用fit_on_texts之后设置。</span>
<span class="token operator">>></span><span class="token operator">></span>tokenizer<span class="token punctuation">.</span>word_index
<span class="token punctuation">&#123;</span><span class="token string">'今天'</span><span class="token punctuation">:</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token string">'了'</span><span class="token punctuation">:</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token string">'北京'</span><span class="token punctuation">:</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token string">'下'</span><span class="token punctuation">:</span> <span class="token number">4</span><span class="token punctuation">,</span> <span class="token string">'雨'</span><span class="token punctuation">:</span> <span class="token number">5</span><span class="token punctuation">,</span> <span class="token string">'我'</span><span class="token punctuation">:</span> <span class="token number">6</span><span class="token punctuation">,</span> <span class="token string">'加班'</span><span class="token punctuation">:</span> <span class="token number">7</span><span class="token punctuation">&#125;</span>

<span class="token comment">#  document_count属性</span>
<span class="token operator">>></span><span class="token operator">></span>tokenizer<span class="token punctuation">.</span>document_count
<span class="token number">2</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>

<h3 id="3-常用示例"><a href="#3-常用示例" class="headerlink" title="3.常用示例"></a>3.常用示例</h3><p>  还以上面的tokenizer对象为基础，经常会使用<code>texts_to_sequences()</code>方法 和 序列预处理方法 <code>keras.preprocessing.sequence.pad_sequences</code>一起使用。</p>
<p>Code.3.1 常用示例</p>
<pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token operator">>></span><span class="token operator">></span>tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">"下 雨 我 加班"</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
<span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">,</span> <span class="token number">6</span><span class="token punctuation">,</span> <span class="token number">7</span><span class="token punctuation">]</span><span class="token punctuation">]</span>

<span class="token operator">>></span><span class="token operator">></span>keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token string">"下 雨 我 加班"</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">,</span> maxlen<span class="token operator">=</span><span class="token number">20</span><span class="token punctuation">)</span>
array<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">,</span> <span class="token number">6</span><span class="token punctuation">,</span> <span class="token number">7</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">,</span>dtype<span class="token operator">=</span>int32<span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span></span></code></pre>

<p>有关pad_sequences用法见python函数——序列预处理pad_sequences()序列填充</p>
<blockquote>
<h2 id="序列预处理pad-sequences-序列填充"><a href="#序列预处理pad-sequences-序列填充" class="headerlink" title="序列预处理pad_sequences()序列填充"></a><a target="_blank" rel="noopener" href="https://blog.csdn.net/wcy23580/article/details/84957471">序列预处理pad_sequences()序列填充</a></h2><p>为了实现的简便，keras只能接受长度相同的序列输入。因此如果目前序列长度参差不齐，这时需要使用<strong>pad_sequences()<strong>。该函数是</strong>将序列转化为经过填充以后的一个长度相同的新序列</strong>。</p>
<h2 id="语法"><a href="#语法" class="headerlink" title="语法"></a>语法</h2><pre class="line-numbers language-python" data-language="python"><code class="language-python">keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>sequences<span class="token punctuation">,</span> 
	maxlen<span class="token operator">=</span><span class="token boolean">None</span><span class="token punctuation">,</span>
	dtype<span class="token operator">=</span><span class="token string">'int32'</span><span class="token punctuation">,</span>
	padding<span class="token operator">=</span><span class="token string">'pre'</span><span class="token punctuation">,</span>
	truncating<span class="token operator">=</span><span class="token string">'pre'</span><span class="token punctuation">,</span> 
	value<span class="token operator">=</span><span class="token number">0</span><span class="token punctuation">.</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>

<h3 id="参数"><a href="#参数" class="headerlink" title="参数"></a>参数</h3><ul>
<li>sequences：浮点数或整数构成的两层嵌套列表</li>
<li>maxlen：None或整数，为序列的最大长度。大于此长度的序列将被截短，小于此长度的序列将在后部填0.</li>
<li>dtype：返回的numpy array的数据类型</li>
<li>padding：‘pre’或‘post’，确定当需要补0时，在序列的起始还是结尾补`</li>
<li>truncating：‘pre’或‘post’，确定当需要截断序列时，从起始还是结尾截断</li>
<li>value：浮点数，此值将在填充时代替默认的填充值0</li>
</ul>
<h3 id="返回值"><a href="#返回值" class="headerlink" title="返回值"></a>返回值</h3><p>返回的是个2维张量，长度为maxlen</p>
<h2 id="实例"><a href="#实例" class="headerlink" title="实例"></a>实例</h2><pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token operator">>></span><span class="token operator">></span>list_1 <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">,</span><span class="token number">3</span><span class="token punctuation">,</span><span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">]</span>
<span class="token operator">>></span><span class="token operator">></span>keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>list_1<span class="token punctuation">,</span> maxlen<span class="token operator">=</span><span class="token number">10</span><span class="token punctuation">)</span>
array<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">,</span> dtype<span class="token operator">=</span>int32<span class="token punctuation">)</span>

<span class="token operator">>></span><span class="token operator">></span>list_2 <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">,</span><span class="token number">2</span><span class="token punctuation">,</span><span class="token number">3</span><span class="token punctuation">,</span><span class="token number">4</span><span class="token punctuation">,</span><span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">]</span>
<span class="token operator">>></span><span class="token operator">></span>keras<span class="token punctuation">.</span>preprocessing<span class="token punctuation">.</span>sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>list_2<span class="token punctuation">,</span> maxlen<span class="token operator">=</span><span class="token number">10</span><span class="token punctuation">)</span>
array<span class="token punctuation">(</span><span class="token punctuation">[</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">1</span><span class="token punctuation">,</span> <span class="token number">2</span><span class="token punctuation">,</span> <span class="token number">3</span><span class="token punctuation">,</span> <span class="token number">4</span><span class="token punctuation">,</span> <span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">]</span><span class="token punctuation">,</span> dtype<span class="token operator">=</span>int32<span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
</blockquote>
<h1 id="过程总结："><a href="#过程总结：" class="headerlink" title="过程总结："></a>过程总结：</h1><pre><code>1. 下载数据集（或者从文件中加载），读取到一个变量/数组中
2. 使用Tokenizer，将数组fit进去，生成一个字典
3. 再使用token.texts_to_sequences,将数据放进去生成序列（数字列表）
4. 然后可以使用pad_sequences对生成的序列进行长度控制，让他们长度一致
5. 建立模型，模型中第一个是Embedding词嵌入层，将数字列表转换为向量
6. 然后中间就开始搭建神经网络，训练，评测即可。
</code></pre>
<h2 id="具体："><a href="#具体：" class="headerlink" title="具体："></a>具体：</h2><h3 id="Tokenizer函数"><a href="#Tokenizer函数" class="headerlink" title="Tokenizer函数"></a>Tokenizer函数</h3><pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token comment"># tokennizer</span>
<span class="token keyword">def</span> <span class="token function">get_data</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token builtin">all</span> <span class="token operator">=</span> train_x
    tokenizer <span class="token operator">=</span> Tokenizer<span class="token punctuation">(</span>num_words<span class="token operator">=</span><span class="token number">80000</span><span class="token punctuation">)</span>  <span class="token comment"># 分词MAX_NB_WORDS</span>
    
    tokenizer<span class="token punctuation">.</span>fit_on_texts<span class="token punctuation">(</span>train_x<span class="token punctuation">)</span>  <span class="token comment"># 将数据放入</span>
    
    sequences_context <span class="token operator">=</span> tokenizer<span class="token punctuation">.</span>texts_to_sequences<span class="token punctuation">(</span>train_x<span class="token punctuation">)</span>  <span class="token comment"># 受num_words影响</span>

    <span class="token comment"># 词语数据，截取长度</span>
    data_context <span class="token operator">=</span> sequence<span class="token punctuation">.</span>pad_sequences<span class="token punctuation">(</span>sequences_context<span class="token punctuation">,</span> maxlen<span class="token operator">=</span><span class="token number">500</span><span class="token punctuation">)</span>  <span class="token comment"># 将长度不足 100 的新闻用 0 填充（在前端填充）</span>

    <span class="token comment"># 标签，改为one-hot</span>
    labels <span class="token operator">=</span> utils<span class="token punctuation">.</span>to_categorical<span class="token punctuation">(</span>np<span class="token punctuation">.</span>asarray<span class="token punctuation">(</span>label_y<span class="token punctuation">)</span><span class="token punctuation">)</span>  <span class="token comment"># 最后将标签处理成 one-hot 向量，比如 6 变成了 [0,0,0,0,0,0,1,0,0,0,0,0,0]，</span>
    labels_test <span class="token operator">=</span> utils<span class="token punctuation">.</span>to_categorical<span class="token punctuation">(</span>np<span class="token punctuation">.</span>asarray<span class="token punctuation">(</span>label_y<span class="token punctuation">)</span><span class="token punctuation">)</span>
    
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'Shape of data tensor:'</span><span class="token punctuation">,</span> data_context<span class="token punctuation">.</span>shape<span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'Shape of label tensor:'</span><span class="token punctuation">,</span> labels<span class="token punctuation">.</span>shape<span class="token punctuation">)</span>
    <span class="token keyword">return</span> data_context<span class="token punctuation">,</span> labels
<span class="token comment"># 不用feature</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<p>这时候就将文本转换为数字列表了。因为神经网络里面只能输入数字。</p>
<h3 id="搭建模型，训练数据"><a href="#搭建模型，训练数据" class="headerlink" title="搭建模型，训练数据"></a>搭建模型，训练数据</h3><h4 id="划分为训练集和测试集"><a href="#划分为训练集和测试集" class="headerlink" title="划分为训练集和测试集"></a>划分为训练集和测试集</h4><pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token comment"># 训练模型，并保存</span>
<span class="token keyword">def</span> <span class="token function">train</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token comment"># 通过tokenizer取得数据，标签</span>
    data_context<span class="token punctuation">,</span> labels <span class="token operator">=</span> data_vector<span class="token punctuation">.</span>get_data<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token comment"># 划分训练和测试集</span>
    x_train<span class="token punctuation">,</span> x_test<span class="token punctuation">,</span> y_train<span class="token punctuation">,</span> y_test <span class="token operator">=</span> train_test_split<span class="token punctuation">(</span>data_context<span class="token punctuation">,</span> labels<span class="token punctuation">,</span> test_size<span class="token operator">=</span><span class="token number">0.2</span><span class="token punctuation">)</span>
    <span class="token comment"># 这里搭建好模型就直接一起训练了</span>
    train_lstm<span class="token punctuation">(</span>x_train<span class="token punctuation">,</span> y_train<span class="token punctuation">,</span> x_test<span class="token punctuation">,</span> y_test<span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>

<p>搭建模型和训练的代码放一个函数里了：</p>
<h4 id="搭建模型"><a href="#搭建模型" class="headerlink" title="搭建模型"></a>搭建模型</h4><pre class="line-numbers language-python" data-language="python"><code class="language-python">
<span class="token comment">##定义网络结构</span>
<span class="token keyword">def</span> <span class="token function">train_lstm</span><span class="token punctuation">(</span>x_train<span class="token punctuation">,</span> y_train<span class="token punctuation">,</span> x_test<span class="token punctuation">,</span> y_test<span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'train'</span><span class="token punctuation">)</span>
    model <span class="token operator">=</span> Sequential<span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment"># or Graph or whatever</span>
    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>Embedding<span class="token punctuation">(</span>output_dim<span class="token operator">=</span>vocab_dim<span class="token punctuation">,</span>
                        input_dim<span class="token operator">=</span>input_dim<span class="token punctuation">,</span>
                        mask_zero<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">,</span>
                        input_length<span class="token operator">=</span>input_length<span class="token punctuation">)</span><span class="token punctuation">)</span>  <span class="token comment"># Adding Input Length</span>
    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>LSTM<span class="token punctuation">(</span>units<span class="token operator">=</span><span class="token number">50</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>Dropout<span class="token punctuation">(</span><span class="token number">0.5</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>Dense<span class="token punctuation">(</span><span class="token number">16</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
    model<span class="token punctuation">.</span>add<span class="token punctuation">(</span>Activation<span class="token punctuation">(</span><span class="token string">'softmax'</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
<span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<h4 id="训练模型"><a href="#训练模型" class="headerlink" title="训练模型"></a>训练模型</h4><pre class="line-numbers language-python" data-language="python"><code class="language-python">metrics <span class="token operator">=</span> Metrics<span class="token punctuation">(</span><span class="token punctuation">)</span>

model<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span>loss<span class="token operator">=</span><span class="token string">'binary_crossentropy'</span><span class="token punctuation">,</span>
              optimizer<span class="token operator">=</span><span class="token string">'adam'</span><span class="token punctuation">,</span> metrics<span class="token operator">=</span><span class="token punctuation">[</span><span class="token string">'accuracy'</span><span class="token punctuation">]</span><span class="token punctuation">)</span>

model<span class="token punctuation">.</span>fit<span class="token punctuation">(</span>x_train<span class="token punctuation">,</span> y_train<span class="token punctuation">,</span>
          batch_size<span class="token operator">=</span>batch_size<span class="token punctuation">,</span>
          epochs<span class="token operator">=</span>n_epoch<span class="token punctuation">,</span>
          validation_data<span class="token operator">=</span><span class="token punctuation">(</span>x_test<span class="token punctuation">,</span> y_test<span class="token punctuation">)</span><span class="token punctuation">,</span>
          callbacks<span class="token operator">=</span><span class="token punctuation">[</span>metrics<span class="token punctuation">]</span><span class="token punctuation">)</span>
score<span class="token punctuation">,</span> acc <span class="token operator">=</span> model<span class="token punctuation">.</span>evaluate<span class="token punctuation">(</span>x_test<span class="token punctuation">,</span> y_test<span class="token punctuation">,</span> batch_size<span class="token operator">=</span>batch_size<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'Test score:'</span><span class="token punctuation">,</span> score<span class="token punctuation">)</span>
<span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'Test accuracy:'</span><span class="token punctuation">,</span> acc<span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>
<h4 id="存储模型"><a href="#存储模型" class="headerlink" title="存储模型"></a>存储模型</h4><pre class="line-numbers language-python" data-language="python"><code class="language-python">yaml_string <span class="token operator">=</span> model<span class="token punctuation">.</span>to_yaml<span class="token punctuation">(</span><span class="token punctuation">)</span>
 <span class="token keyword">with</span> <span class="token builtin">open</span><span class="token punctuation">(</span><span class="token string">'lstm.yml'</span><span class="token punctuation">,</span> <span class="token string">'w'</span><span class="token punctuation">)</span> <span class="token keyword">as</span> outfile<span class="token punctuation">:</span>
     outfile<span class="token punctuation">.</span>write<span class="token punctuation">(</span>yaml<span class="token punctuation">.</span>dump<span class="token punctuation">(</span>yaml_string<span class="token punctuation">,</span> default_flow_style<span class="token operator">=</span><span class="token boolean">True</span><span class="token punctuation">)</span><span class="token punctuation">)</span>
 model<span class="token punctuation">.</span>save_weights<span class="token punctuation">(</span><span class="token string">'lstm.h5'</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span></span></code></pre>



<p>其中Metrics是一个回调类,用来输出f1,召回率，准确率：</p>
<pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token keyword">class</span> <span class="token class-name">Metrics</span><span class="token punctuation">(</span>Callback<span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">def</span> <span class="token function">on_train_begin</span><span class="token punctuation">(</span>self<span class="token punctuation">,</span> logs<span class="token operator">=</span><span class="token punctuation">&#123;</span><span class="token punctuation">&#125;</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        self<span class="token punctuation">.</span>val_f1s <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
        self<span class="token punctuation">.</span>val_recalls <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
        self<span class="token punctuation">.</span>val_precisions <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>

    <span class="token keyword">def</span> <span class="token function">on_epoch_end</span><span class="token punctuation">(</span>self<span class="token punctuation">,</span> epoch<span class="token punctuation">,</span> logs<span class="token operator">=</span><span class="token punctuation">&#123;</span><span class="token punctuation">&#125;</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        val_predict <span class="token operator">=</span> <span class="token punctuation">(</span>np<span class="token punctuation">.</span>asarray<span class="token punctuation">(</span>self<span class="token punctuation">.</span>model<span class="token punctuation">.</span>predict<span class="token punctuation">(</span>self<span class="token punctuation">.</span>validation_data<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">)</span><span class="token punctuation">.</span><span class="token builtin">round</span><span class="token punctuation">(</span><span class="token punctuation">)</span>  <span class="token comment">##.model</span>
        val_targ <span class="token operator">=</span> self<span class="token punctuation">.</span>validation_data<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span>  <span class="token comment">###.model</span>
        _val_f1 <span class="token operator">=</span> f1_score<span class="token punctuation">(</span>val_targ<span class="token punctuation">,</span> val_predict<span class="token punctuation">,</span> average<span class="token operator">=</span><span class="token string">'micro'</span><span class="token punctuation">)</span>
        _val_recall <span class="token operator">=</span> recall_score<span class="token punctuation">(</span>val_targ<span class="token punctuation">,</span> val_predict<span class="token punctuation">,</span> average<span class="token operator">=</span><span class="token string">'micro'</span><span class="token punctuation">)</span>  <span class="token comment">###</span>
        _val_precision <span class="token operator">=</span> precision_score<span class="token punctuation">(</span>val_targ<span class="token punctuation">,</span> val_predict<span class="token punctuation">,</span> average<span class="token operator">=</span><span class="token string">'micro'</span><span class="token punctuation">)</span>  <span class="token comment">###</span>
        self<span class="token punctuation">.</span>val_f1s<span class="token punctuation">.</span>append<span class="token punctuation">(</span>_val_f1<span class="token punctuation">)</span>
        self<span class="token punctuation">.</span>val_recalls<span class="token punctuation">.</span>append<span class="token punctuation">(</span>_val_recall<span class="token punctuation">)</span>
        self<span class="token punctuation">.</span>val_precisions<span class="token punctuation">.</span>append<span class="token punctuation">(</span>_val_precision<span class="token punctuation">)</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'_val_f1: '</span><span class="token punctuation">,</span> _val_f1<span class="token punctuation">)</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'_val_recall: '</span><span class="token punctuation">,</span> _val_recall<span class="token punctuation">)</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">'_val_precision: '</span><span class="token punctuation">,</span> _val_precision<span class="token punctuation">)</span>
        fls<span class="token punctuation">.</span>append<span class="token punctuation">(</span>_val_f1<span class="token punctuation">)</span>
        rec<span class="token punctuation">.</span>append<span class="token punctuation">(</span>_val_recall<span class="token punctuation">)</span>
        pre<span class="token punctuation">.</span>append<span class="token punctuation">(</span>_val_precision<span class="token punctuation">)</span>
        <span class="token comment"># print("— val_f1: %f — val_precision: %f — val_recall: %f" %(_val_f1, _val_precision, _val_recall))</span>
        <span class="token keyword">return</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>





                
            </div>
            <hr/>

            

    <div class="reprint" id="reprint-statement">
        
            <div class="reprint__author">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-user">
                        文章作者:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="/about" rel="external nofollow noreferrer">SongX64</a>
                </span>
            </div>
            <div class="reprint__type">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-link">
                        文章链接:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="https://songx86.gitee.io/article/81f45269.html">https://songx86.gitee.io/article/81f45269.html</a>
                </span>
            </div>
            <div class="reprint__notice">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-copyright">
                        版权声明:
                    </i>
                </span>
                <span class="reprint-info">
                    本博客所有文章除特別声明外，均采用
                    <a href="https://creativecommons.org/licenses/by/4.0/deed.zh" rel="external nofollow noreferrer" target="_blank">CC BY 4.0</a>
                    许可协议。转载请注明来源
                    <a href="/about" target="_blank">SongX64</a>
                    !
                </span>
            </div>
        
    </div>

    <script async defer>
      document.addEventListener("copy", function (e) {
        let toastHTML = '<span>复制成功，请遵循本文的转载规则</span><button class="btn-flat toast-action" onclick="navToReprintStatement()" style="font-size: smaller">查看</a>';
        M.toast({html: toastHTML})
      });

      function navToReprintStatement() {
        $("html, body").animate({scrollTop: $("#reprint-statement").offset().top - 80}, 800);
      }
    </script>



            <div class="tag_share" style="display: block;">
                <div class="post-meta__tag-list" style="display: inline-block;">
                    
                        <div class="article-tag">
                            
                                <a href="/tags/%E7%A7%91%E7%A0%94/">
                                    <span class="chip bg-color">科研</span>
                                </a>
                            
                                <a href="/tags/%E5%AE%9E%E9%AA%8C/">
                                    <span class="chip bg-color">实验</span>
                                </a>
                            
                                <a href="/tags/Python/">
                                    <span class="chip bg-color">Python</span>
                                </a>
                            
                                <a href="/tags/Keras/">
                                    <span class="chip bg-color">Keras</span>
                                </a>
                            
                                <a href="/tags/%E6%96%87%E6%9C%AC%E5%A4%84%E7%90%86/">
                                    <span class="chip bg-color">文本处理</span>
                                </a>
                            
                        </div>
                    
                </div>
                <div class="post_share" style="zoom: 80%; width: fit-content; display: inline-block; float: right; margin: -0.15rem 0;">
                    <link rel="stylesheet" type="text/css" href="/libs/share/css/share.min.css">
<div id="article-share">

    
    <div class="social-share" data-sites="twitter,facebook,google,qq,qzone,wechat,weibo,douban,linkedin" data-wechat-qrcode-helper="<p>微信扫一扫即可分享！</p>"></div>
    <script src="/libs/share/js/social-share.min.js"></script>
    

    

</div>

                </div>
            </div>
            
                <style>
    #reward {
        margin: 40px 0;
        text-align: center;
    }

    #reward .reward-link {
        font-size: 1.4rem;
        line-height: 38px;
    }

    #reward .btn-floating:hover {
        box-shadow: 0 6px 12px rgba(0, 0, 0, 0.2), 0 5px 15px rgba(0, 0, 0, 0.2);
    }

    #rewardModal {
        width: 320px;
        height: 350px;
    }

    #rewardModal .reward-title {
        margin: 15px auto;
        padding-bottom: 5px;
    }

    #rewardModal .modal-content {
        padding: 10px;
    }

    #rewardModal .close {
        position: absolute;
        right: 15px;
        top: 15px;
        color: rgba(0, 0, 0, 0.5);
        font-size: 1.3rem;
        line-height: 20px;
        cursor: pointer;
    }

    #rewardModal .close:hover {
        color: #ef5350;
        transform: scale(1.3);
        -moz-transform:scale(1.3);
        -webkit-transform:scale(1.3);
        -o-transform:scale(1.3);
    }

    #rewardModal .reward-tabs {
        margin: 0 auto;
        width: 210px;
    }

    .reward-tabs .tabs {
        height: 38px;
        margin: 10px auto;
        padding-left: 0;
    }

    .reward-content ul {
        padding-left: 0 !important;
    }

    .reward-tabs .tabs .tab {
        height: 38px;
        line-height: 38px;
    }

    .reward-tabs .tab a {
        color: #fff;
        background-color: #ccc;
    }

    .reward-tabs .tab a:hover {
        background-color: #ccc;
        color: #fff;
    }

    .reward-tabs .wechat-tab .active {
        color: #fff !important;
        background-color: #22AB38 !important;
    }

    .reward-tabs .alipay-tab .active {
        color: #fff !important;
        background-color: #019FE8 !important;
    }

    .reward-tabs .reward-img {
        width: 210px;
        height: 210px;
    }
</style>

<div id="reward">
    <a href="#rewardModal" class="reward-link modal-trigger btn-floating btn-medium waves-effect waves-light red">赏</a>

    <!-- Modal Structure -->
    <div id="rewardModal" class="modal">
        <div class="modal-content">
            <a class="close modal-close"><i class="fas fa-times"></i></a>
            <h4 class="reward-title">你的赏识是我前进的动力</h4>
            <div class="reward-content">
                <div class="reward-tabs">
                    <ul class="tabs row">
                        <li class="tab col s6 alipay-tab waves-effect waves-light"><a href="#alipay">支付宝</a></li>
                        <li class="tab col s6 wechat-tab waves-effect waves-light"><a href="#wechat">微 信</a></li>
                    </ul>
                    <div id="alipay">
                        <img src="/medias/reward/alipay.jpg" class="reward-img" alt="支付宝打赏二维码">
                    </div>
                    <div id="wechat">
                        <img src="/medias/reward/wechat.jpg" class="reward-img" alt="微信打赏二维码">
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>

<script>
    $(function () {
        $('.tabs').tabs();
    });
</script>

            
        </div>
    </div>

    

    

    

    

    
        <style>
    .valine-card {
        margin: 1.5rem auto;
    }

    .valine-card .card-content {
        padding: 20px 20px 5px 20px;
    }

    #vcomments textarea {
        box-sizing: border-box;
        background: url("/medias/comment_bg.png") 100% 100% no-repeat;
    }

    #vcomments p {
        margin: 2px 2px 10px;
        font-size: 1.05rem;
        line-height: 1.78rem;
    }

    #vcomments blockquote p {
        text-indent: 0.2rem;
    }

    #vcomments a {
        padding: 0 2px;
        color: #4cbf30;
        font-weight: 500;
        text-decoration: none;
    }

    #vcomments img {
        max-width: 100%;
        height: auto;
        cursor: pointer;
    }

    #vcomments ol li {
        list-style-type: decimal;
    }

    #vcomments ol,
    ul {
        display: block;
        padding-left: 2em;
        word-spacing: 0.05rem;
    }

    #vcomments ul li,
    ol li {
        display: list-item;
        line-height: 1.8rem;
        font-size: 1rem;
    }

    #vcomments ul li {
        list-style-type: disc;
    }

    #vcomments ul ul li {
        list-style-type: circle;
    }

    #vcomments table, th, td {
        padding: 12px 13px;
        border: 1px solid #dfe2e5;
    }

    #vcomments table, th, td {
        border: 0;
    }

    table tr:nth-child(2n), thead {
        background-color: #fafafa;
    }

    #vcomments table th {
        background-color: #f2f2f2;
        min-width: 80px;
    }

    #vcomments table td {
        min-width: 80px;
    }

    #vcomments h1 {
        font-size: 1.85rem;
        font-weight: bold;
        line-height: 2.2rem;
    }

    #vcomments h2 {
        font-size: 1.65rem;
        font-weight: bold;
        line-height: 1.9rem;
    }

    #vcomments h3 {
        font-size: 1.45rem;
        font-weight: bold;
        line-height: 1.7rem;
    }

    #vcomments h4 {
        font-size: 1.25rem;
        font-weight: bold;
        line-height: 1.5rem;
    }

    #vcomments h5 {
        font-size: 1.1rem;
        font-weight: bold;
        line-height: 1.4rem;
    }

    #vcomments h6 {
        font-size: 1rem;
        line-height: 1.3rem;
    }

    #vcomments p {
        font-size: 1rem;
        line-height: 1.5rem;
    }

    #vcomments hr {
        margin: 12px 0;
        border: 0;
        border-top: 1px solid #ccc;
    }

    #vcomments blockquote {
        margin: 15px 0;
        border-left: 5px solid #42b983;
        padding: 1rem 0.8rem 0.3rem 0.8rem;
        color: #666;
        background-color: rgba(66, 185, 131, .1);
    }

    #vcomments pre {
        font-family: monospace, monospace;
        padding: 1.2em;
        margin: .5em 0;
        background: #272822;
        overflow: auto;
        border-radius: 0.3em;
        tab-size: 4;
    }

    #vcomments code {
        font-family: monospace, monospace;
        padding: 1px 3px;
        font-size: 0.92rem;
        color: #e96900;
        background-color: #f8f8f8;
        border-radius: 2px;
    }

    #vcomments pre code {
        font-family: monospace, monospace;
        padding: 0;
        color: #e8eaf6;
        background-color: #272822;
    }

    #vcomments pre[class*="language-"] {
        padding: 1.2em;
        margin: .5em 0;
    }

    #vcomments code[class*="language-"],
    pre[class*="language-"] {
        color: #e8eaf6;
    }

    #vcomments [type="checkbox"]:not(:checked), [type="checkbox"]:checked {
        position: inherit;
        margin-left: -1.3rem;
        margin-right: 0.4rem;
        margin-top: -1px;
        vertical-align: middle;
        left: unset;
        visibility: visible;
    }

    #vcomments b,
    strong {
        font-weight: bold;
    }

    #vcomments dfn {
        font-style: italic;
    }

    #vcomments small {
        font-size: 85%;
    }

    #vcomments cite {
        font-style: normal;
    }

    #vcomments mark {
        background-color: #fcf8e3;
        padding: .2em;
    }

    #vcomments table, th, td {
        padding: 12px 13px;
        border: 1px solid #dfe2e5;
    }

    table tr:nth-child(2n), thead {
        background-color: #fafafa;
    }

    #vcomments table th {
        background-color: #f2f2f2;
        min-width: 80px;
    }

    #vcomments table td {
        min-width: 80px;
    }

    #vcomments [type="checkbox"]:not(:checked), [type="checkbox"]:checked {
        position: inherit;
        margin-left: -1.3rem;
        margin-right: 0.4rem;
        margin-top: -1px;
        vertical-align: middle;
        left: unset;
        visibility: visible;
    }
</style>

<div class="card valine-card" data-aos="fade-up">
    <div class="comment_headling" style="font-size: 20px; font-weight: 700; position: relative; padding-left: 20px; top: 15px; padding-bottom: 5px;">
        <i class="fas fa-comments fa-fw" aria-hidden="true"></i>
        <span>评论</span>
    </div>
    <div id="vcomments" class="card-content" style="display: grid">
    </div>
</div>

<script src="/libs/valine/av-min.js"></script>
<script src="/libs/valine/Valine.min.js"></script>
<script>
    new Valine({
        el: '#vcomments',
        appId: 'gRuzkj3uDnUjERvWlFkWkYjF-gzGzoHsz',
        appKey: 'tCCMKQDflcsl0hOAh2CUfvPP',
        notify: 'false' === 'true',
        verify: 'false' === 'true',
        visitor: 'true' === 'true',
        avatar: 'mm',
        pageSize: '10',
        lang: 'zh-cn',
        placeholder: 'just go go'
    });
</script>

<!--酷Q推送-->


    

    

    

    

    

<article id="prenext-posts" class="prev-next articles">
    <div class="row article-row">
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge left-badge text-color">
                <i class="fas fa-chevron-left"></i>&nbsp;上一篇</div>
            <div class="card">
                <a href="/article/b9292b8e.html">
                    <div class="card-image">
                        
                        
                        <img src="/medias/featureimages/1/10.jpg" class="responsive-img" alt="国内外用户画像研究综述 - 徐芳">
                        
                        <span class="card-title">国内外用户画像研究综述 - 徐芳</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            2020年论文， 国内外用户画像研究综述
                        
                    </div>
                    <div class="publish-info">
                        <span class="publish-date">
                            <i class="far fa-clock fa-fw icon-date"></i>2020-09-13
                        </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/%E7%A7%91%E7%A0%94/" class="post-category">
                                    科研
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/%E7%A7%91%E7%A0%94/">
                        <span class="chip bg-color">科研</span>
                    </a>
                    
                    <a href="/tags/%E8%AE%BA%E6%96%87%E7%BF%BB%E8%AF%91/">
                        <span class="chip bg-color">论文翻译</span>
                    </a>
                    
                    <a href="/tags/%E7%94%A8%E6%88%B7%E7%94%BB%E5%83%8F/">
                        <span class="chip bg-color">用户画像</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge right-badge text-color">
                下一篇&nbsp;<i class="fas fa-chevron-right"></i>
            </div>
            <div class="card">
                <a href="/article/42e73f82.html">
                    <div class="card-image">
                        
                        
                        <img src="/medias/featureimages/101.jpg" class="responsive-img" alt="Bootstrap基础03——常用组件">
                        
                        <span class="card-title">Bootstrap基础03——常用组件</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            Bootstrap中的一些常用组件，也就是标签。
                        
                    </div>
                    <div class="publish-info">
                            <span class="publish-date">
                                <i class="far fa-clock fa-fw icon-date"></i>2020-06-23
                            </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/%E5%89%8D%E7%AB%AF/" class="post-category">
                                    前端
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/%E5%89%8D%E7%AB%AF/">
                        <span class="chip bg-color">前端</span>
                    </a>
                    
                    <a href="/tags/Bootstrap/">
                        <span class="chip bg-color">Bootstrap</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
    </div>
</article>

</div>



<!-- 代码块功能依赖 -->
<script type="text/javascript" src="/libs/codeBlock/codeBlockFuction.js"></script>

<!-- 代码语言 -->

<script type="text/javascript" src="/libs/codeBlock/codeLang.js"></script>


<!-- 代码块复制 -->

<script type="text/javascript" src="/libs/codeBlock/codeCopy.js"></script>


<!-- 代码块收缩 -->

<script type="text/javascript" src="/libs/codeBlock/codeShrink.js"></script>


    </div>
    <div id="toc-aside" class="expanded col l3 hide-on-med-and-down">
        <div class="toc-widget card" style="background-color: white;">
            <div class="toc-title"><i class="far fa-list-alt"></i>&nbsp;&nbsp;目录</div>
            <div id="toc-content"></div>
        </div>
    </div>
</div>

<!-- TOC 悬浮按钮. -->

<div id="floating-toc-btn" class="hide-on-med-and-down">
    <a class="btn-floating btn-large bg-color">
        <i class="fas fa-list-ul"></i>
    </a>
</div>


<script src="/libs/tocbot/tocbot.min.js"></script>
<script>
    $(function () {
        tocbot.init({
            tocSelector: '#toc-content',
            contentSelector: '#articleContent',
            headingsOffset: -($(window).height() * 0.4 - 45),
            collapseDepth: Number('0'),
            headingSelector: 'h2, h3, h4'
        });

        // modify the toc link href to support Chinese.
        let i = 0;
        let tocHeading = 'toc-heading-';
        $('#toc-content a').each(function () {
            $(this).attr('href', '#' + tocHeading + (++i));
        });

        // modify the heading title id to support Chinese.
        i = 0;
        $('#articleContent').children('h2, h3, h4').each(function () {
            $(this).attr('id', tocHeading + (++i));
        });

        // Set scroll toc fixed.
        let tocHeight = parseInt($(window).height() * 0.4 - 64);
        let $tocWidget = $('.toc-widget');
        $(window).scroll(function () {
            let scroll = $(window).scrollTop();
            /* add post toc fixed. */
            if (scroll > tocHeight) {
                $tocWidget.addClass('toc-fixed');
            } else {
                $tocWidget.removeClass('toc-fixed');
            }
        });

        
        /* 修复文章卡片 div 的宽度. */
        let fixPostCardWidth = function (srcId, targetId) {
            let srcDiv = $('#' + srcId);
            if (srcDiv.length === 0) {
                return;
            }

            let w = srcDiv.width();
            if (w >= 450) {
                w = w + 21;
            } else if (w >= 350 && w < 450) {
                w = w + 18;
            } else if (w >= 300 && w < 350) {
                w = w + 16;
            } else {
                w = w + 14;
            }
            $('#' + targetId).width(w);
        };

        // 切换TOC目录展开收缩的相关操作.
        const expandedClass = 'expanded';
        let $tocAside = $('#toc-aside');
        let $mainContent = $('#main-content');
        $('#floating-toc-btn .btn-floating').click(function () {
            if ($tocAside.hasClass(expandedClass)) {
                $tocAside.removeClass(expandedClass).hide();
                $mainContent.removeClass('l9');
            } else {
                $tocAside.addClass(expandedClass).show();
                $mainContent.addClass('l9');
            }
            fixPostCardWidth('artDetail', 'prenext-posts');
        });
        
    });
</script>

    

</main>




    <footer class="page-footer bg-color">
    
        <link rel="stylesheet" href="/libs/aplayer/APlayer.min.css">
<style>
    .aplayer .aplayer-lrc p {
        
        display: none;
        
        font-size: 12px;
        font-weight: 700;
        line-height: 16px !important;
    }

    .aplayer .aplayer-lrc p.aplayer-lrc-current {
        
        display: none;
        
        font-size: 15px;
        color: #42b983;
    }

    
    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body {
        left: -66px !important;
    }

    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body:hover {
        left: 0px !important;
    }

    
</style>
<div class="">
    
    <div class="row">
        <meting-js class="col l8 offset-l2 m10 offset-m1 s12"
                   server="netease"
                   type="playlist"
                   id="767683882"
                   fixed='true'
                   autoplay='false'
                   theme='#42b983'
                   loop='all'
                   order='random'
                   preload='auto'
                   volume='0.7'
                   list-folded='true'
        >
        </meting-js>
    </div>
</div>

<script src="/libs/aplayer/APlayer.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/meting@2/dist/Meting.min.js"></script>

    
    <div class="container row center-align" style="margin-bottom: 0px !important;">
        <div class="col s12 m8 l8 copy-right">
            Copyright&nbsp;&copy;
            
                <span id="year">2020-2021</span>
            
            <span id="year">2020</span>
            <a href="/about" target="_blank">SongX64</a>
            |&nbsp;Powered by&nbsp;<a href="https://hexo.io/" target="_blank">Hexo</a>
            |&nbsp;Theme&nbsp;<a href="https://github.com/blinkfox/hexo-theme-matery" target="_blank">Matery</a>
			<br>
				<a href="https://beian.miit.gov.cn/" target="_blank">鲁ICP备2021015839号-1</a>
				 | 
				<img src="/medias/icp.png" style="vertical-align: text-bottom;" />
		 		<a target="_blank" href="http://www.beian.gov.cn/portal/registerSystemInfo?recordcode=37083102000087">鲁公网安备 37083102000087号</a>
            </span>
			
		 
            <br>
            
            
            
            
            
            <br>
            
            <br>
            
        </div>
        <div class="col s12 m4 l4 social-link ">
    <a href="https://github.com/SongX64" class="tooltipped" target="_blank" data-tooltip="访问我的GitHub" data-position="top" data-delay="50">
        <i class="fab fa-github"></i>
    </a>



    <a href="mailto:song.x64@gmail.com" class="tooltipped" target="_blank" data-tooltip="邮件联系我" data-position="top" data-delay="50">
        <i class="fas fa-envelope-open"></i>
    </a>







    <a href="tencent://AddContact/?fromId=50&fromSubId=1&subcmd=all&uin=song-x64@qq.com" class="tooltipped" target="_blank" data-tooltip="QQ联系我: song-x64@qq.com" data-position="top" data-delay="50">
        <i class="fab fa-qq"></i>
    </a>







</div>
    </div>
</footer>

<div class="progress-bar"></div>


    <!-- 搜索遮罩框 -->
<div id="searchModal" class="modal">
    <div class="modal-content">
        <div class="search-header">
            <span class="title"><i class="fas fa-search"></i>&nbsp;&nbsp;搜索</span>
            <input type="search" id="searchInput" name="s" placeholder="请输入搜索的关键字"
                   class="search-input">
        </div>
        <div id="searchResult"></div>
    </div>
</div>

<script type="text/javascript">
$(function () {
    var searchFunc = function (path, search_id, content_id) {
        'use strict';
        $.ajax({
            url: path,
            dataType: "xml",
            success: function (xmlResponse) {
                // get the contents from search data
                var datas = $("entry", xmlResponse).map(function () {
                    return {
                        title: $("title", this).text(),
                        content: $("content", this).text(),
                        url: $("url", this).text()
                    };
                }).get();
                var $input = document.getElementById(search_id);
                var $resultContent = document.getElementById(content_id);
                $input.addEventListener('input', function () {
                    var str = '<ul class=\"search-result-list\">';
                    var keywords = this.value.trim().toLowerCase().split(/[\s\-]+/);
                    $resultContent.innerHTML = "";
                    if (this.value.trim().length <= 0) {
                        return;
                    }
                    // perform local searching
                    datas.forEach(function (data) {
                        var isMatch = true;
                        var data_title = data.title.trim().toLowerCase();
                        var data_content = data.content.trim().replace(/<[^>]+>/g, "").toLowerCase();
                        var data_url = data.url;
                        data_url = data_url.indexOf('/') === 0 ? data.url : '/' + data_url;
                        var index_title = -1;
                        var index_content = -1;
                        var first_occur = -1;
                        // only match artiles with not empty titles and contents
                        if (data_title !== '' && data_content !== '') {
                            keywords.forEach(function (keyword, i) {
                                index_title = data_title.indexOf(keyword);
                                index_content = data_content.indexOf(keyword);
                                if (index_title < 0 && index_content < 0) {
                                    isMatch = false;
                                } else {
                                    if (index_content < 0) {
                                        index_content = 0;
                                    }
                                    if (i === 0) {
                                        first_occur = index_content;
                                    }
                                }
                            });
                        }
                        // show search results
                        if (isMatch) {
                            str += "<li><a href='" + data_url + "' class='search-result-title'>" + data_title + "</a>";
                            var content = data.content.trim().replace(/<[^>]+>/g, "");
                            if (first_occur >= 0) {
                                // cut out 100 characters
                                var start = first_occur - 20;
                                var end = first_occur + 80;
                                if (start < 0) {
                                    start = 0;
                                }
                                if (start === 0) {
                                    end = 100;
                                }
                                if (end > content.length) {
                                    end = content.length;
                                }
                                var match_content = content.substr(start, end);
                                // highlight all keywords
                                keywords.forEach(function (keyword) {
                                    var regS = new RegExp(keyword, "gi");
                                    match_content = match_content.replace(regS, "<em class=\"search-keyword\">" + keyword + "</em>");
                                });

                                str += "<p class=\"search-result\">" + match_content + "...</p>"
                            }
                            str += "</li>";
                        }
                    });
                    str += "</ul>";
                    $resultContent.innerHTML = str;
                });
            }
        });
    };

    searchFunc('/search.xml', 'searchInput', 'searchResult');
});
</script>

    <!-- 回到顶部按钮 -->
<div id="backTop" class="top-scroll">
    <a class="btn-floating btn-large waves-effect waves-light" href="#!">
        <i class="fas fa-arrow-up"></i>
    </a>
</div>


    <script src="/libs/materialize/materialize.min.js"></script>
    <script src="/libs/masonry/masonry.pkgd.min.js"></script>
    <script src="/libs/aos/aos.js"></script>
    <script src="/libs/scrollprogress/scrollProgress.min.js"></script>
    <script src="/libs/lightGallery/js/lightgallery-all.min.js"></script>
    <script src="/js/matery.js"></script>

    <!-- Baidu Analytics -->

    <!-- Baidu Push -->

<script>
    (function () {
        var bp = document.createElement('script');
        var curProtocol = window.location.protocol.split(':')[0];
        if (curProtocol === 'https') {
            bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';
        } else {
            bp.src = 'http://push.zhanzhang.baidu.com/push.js';
        }
        var s = document.getElementsByTagName("script")[0];
        s.parentNode.insertBefore(bp, s);
    })();
</script>

    
    <script src="/libs/others/clicklove.js" async="async"></script>
    
    

    

    

    <!--腾讯兔小巢-->
    
        <div style="position:fixed;bottom:125px;right:16px;cursor: pointer;">
            <a title="兔小巢" target="_blank" rel="noopener" href="https://support.qq.com/products/330452"><i class="fa fa-comments fa-3x"  aria-hidden="true"></i></a>
        </div>
    
    
    <script type="text/javascript" color="0,0,255"
        pointColor="0,0,255" opacity='0.5'
        zIndex="-1" count="99"
        src="/libs/background/canvas-nest.js"></script>
    

    

    
    <script type="text/javascript" src="/libs/background/ribbon-dynamic.js" async="async"></script>
    

    
    <script src="/libs/instantpage/instantpage.js" type="module"></script>
    

<script src="/live2dw/lib/L2Dwidget.min.js?094cbace49a39548bed64abff5988b05"></script><script>L2Dwidget.init({"pluginRootPath":"live2dw/","pluginJsPath":"lib/","pluginModelPath":"assets/","tagMode":false,"log":false,"model":{"jsonPath":"/live2dw/assets/shizuku.model.json"},"display":{"position":"right","width":150,"height":300},"mobile":{"show":true},"react":{"opacity":0.7}});</script></body>

</html>
